### 7 Clean df to get the two samples ####
#### 7.1 All households sample ####
# We reduce the sample step by step s
# so we can trace out the change in sample size after each step
# Drop if lives in student housing
df_all_cu <- df %>% filter(CUTENURE_t != 6)
# Drop if age_ref < 21 or age2 < 21 (or > 85)
df_all_cu <- df_all_cu %>% filter(AGE_REF_t >= 21 & AGE_REF_t <= 85)
df_all_cu <- df_all_cu %>% filter(is.na(AGE2_t) | (AGE2_t >= 21 & AGE2_t <= 85))
# Drop if age_ref change is greater than 1 or less than 0 (if the sex of the reference person is the same)
df_all_cu$drop <- ifelse(((df_all_cu$d_AGE_REF_t > 1 | df_all_cu$d_AGE_REF_t < 0) & df_all_cu$SEX_REF_t == df_all_cu$SEX_REF_tm1), 1, 0)
df_all_cu <- df_all_cu %>% filter(df_all_cu$drop==0)
#  Drop if age2 change is greater than 1 or less than 0 (if the reference person has the same sex or marital status)
df_all_cu$drop <- ifelse(((df_all_cu$d_AGE_2 > 1 | df_all_cu$d_AGE_2 < 0) & df_all_cu$SEX_REF_t == df_all_cu$SEX_REF_tm1
& df_all_cu$MARITAL1_t == df_all_cu$MARITAL1_tm1), 1, 0)
df_all_cu <- df_all_cu %>% filter(df_all_cu$drop==0|is.na(df_all_cu$drop))
# Drop if change in family size is greater than or less than 3 in absolute values
df_all_cu <- df_all_cu %>% filter (d_FAM_SIZE_t <= 3 & d_FAM_SIZE_t >= -3)
# drop bottom 1 percent of CUs in terms of non-durable consumption in each month after adjustment for CU size
df_all_cu$EX_N_PC <- df_all_cu$EX_N_t / (df_all_cu$NUM_ADTS_t + 0.6 *df_all_cu$NUM_KIDS_t)
df_all_cu <- df_all_cu %>% mutate(
TT = ifelse(YYMM==2010,0,
ifelse(YYMM==2011,1,
ifelse(YYMM==2012,2,
ifelse(YYMM==2101,3,
ifelse(YYMM==2102,4,
ifelse(YYMM==2103,5,
ifelse(YYMM==2104,6,
ifelse(YYMM==2105,7,
ifelse(YYMM==2106,8,
ifelse(YYMM==2107,9,
ifelse(YYMM==2108,10,11))))))))))))
# Quantile regression of per capita consumption on time trend for the bottom 1%
qr_bot <- rq(data=df_all_cu,EX_N_PC~TT,tau=0.01)
summary(qr_bot)
df_all_cu$fit_val_bot <- qr_bot[["fitted.values"]]
df_all_cu$drop <- ifelse(df_all_cu$fit_val_bot > df_all_cu$EX_N_PC, 1, 0)
df_all_cu <- df_all_cu %>% filter(df_all_cu$drop==0) %>% select(-c(EX_N_PC,fit_val_bot,drop,TT))
write.csv(df_all_cu,"df_all_cu.csv",row.names = FALSE)
#### 7.2 Final Panel ####
# drop if lives in student housing
df_f <- df %>% filter(CUTENURE_t != 6)
# drop if age_ref < 21 or age2 < 21
df_f <- df_f %>% filter(AGE_REF_t >= 21)
df_f <- df_f %>% filter(is.na(AGE2_t) | AGE2_t >= 21)
# drop if age_ref change is greater than 1 or less than 0 (if the sex of the reference person is the same)
df_f$drop <- ifelse(((df_f$d_AGE_REF_t > 1 | df_f$d_AGE_REF_t < 0) & df_f$SEX_REF_t == df_f$SEX_REF_tm1), 1, 0)
df_f <- df_f %>% filter(df_f$drop==0)
#  drop if age2 change is greater than 1 or less than 0 (if the reference person has the same sex or marital status)
df_f$drop <- ifelse(((df_f$d_AGE_2 > 1 | df_f$d_AGE_2 < 0) & df_f$SEX_REF_t == df_f$SEX_REF_tm1
& df_f$MARITAL1_t == df_f$MARITAL1_tm1), 1, 0)
df_f <- df_f %>% filter(df_f$drop==0|is.na(df_f$drop))
# drop if change in family size is greater than or less than 3 in absolute values
df_f <- df_f %>% filter (d_FAM_SIZE_t <= 3 & d_FAM_SIZE_t >= -3)
# drop bottom 1 percent of CUs in terms of non-durable consumption in each month after adjustment for CU size
df_f$EX_N_PC <- df_f$EX_N_t / (df_f$NUM_ADTS_t + 0.6 *df_f$NUM_KIDS_t)
df_f %>%
group_by(YYMM) %>%
summarize(quant1 = quantile(EX_N_PC, probs = 0.01)) %>%
ungroup()
df_f$drop <- ifelse((df_f$YYMM == 2010 & df_f$EX_N_PC <= 922), 1,
ifelse((df_f$YYMM == 2011 & df_f$EX_N_PC <= 662), 1,
ifelse((df_f$YYMM == 2012 & df_f$EX_N_PC <= 787), 1,
ifelse((df_f$YYMM == 2101 & df_f$EX_N_PC <= 751), 1,
ifelse((df_f$YYMM == 2102 & df_f$EX_N_PC <= 782), 1,
ifelse((df_f$YYMM == 2103 & df_f$EX_N_PC <= 760), 1,
ifelse((df_f$YYMM == 2104 & df_f$EX_N_PC <= 798), 1,
ifelse((df_f$YYMM == 2105 & df_f$EX_N_PC <= 727), 1,
ifelse((df_f$YYMM == 2106 & df_f$EX_N_PC <= 679), 1,
ifelse((df_f$YYMM == 2107 & df_f$EX_N_PC <= 713), 1,
ifelse((df_f$YYMM == 2108 & df_f$EX_N_PC <= 541), 1,
ifelse((df_f$YYMM == 2109 & df_f$EX_N_PC <= 632), 1, 0
))))))))))))
df_f <- df_f %>% filter(df_f$drop==0)
# Drop high income
df_f$MARITAL_t <- ifelse(df_f$MARITAL1_t == 1, 1, 0)
#### Income cutoff table ####
# For single, without kids
check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 75000 & FINCBTXM_FST > 50000)
table(check$r)
check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 100000 & FINCBTXM_FST > 75000)
table(check$r)
check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 125000 & FINCBTXM_FST > 100000)
table(check$r)
check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 150000 & FINCBTXM_FST > 125000)
table(check$r)
# For single, with kids
check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 125000 & FINCBTXM_FST > 100000)
table(check$r)
check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 150000 & FINCBTXM_FST > 125000)
table(check$r)
check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 175000 & FINCBTXM_FST > 150000)
table(check$r)
check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 200000 & FINCBTXM_FST > 175000)
table(check$r)
check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 225000 & FINCBTXM_FST > 200000)
table(check$r)
check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 250000 & FINCBTXM_FST > 225000)
table(check$r)
check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 275000 & FINCBTXM_FST > 250000)
table(check$r)
check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 300000 & FINCBTXM_FST > 275000)
table(check$r)
# For married couple, no kids
check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 150000 & FINCBTXM_FST > 125000)
table(check$r)
check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 175000 & FINCBTXM_FST > 150000)
table(check$r)
check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 200000 & FINCBTXM_FST > 175000)
table(check$r)
check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 225000 & FINCBTXM_FST > 200000)
table(check$r)
check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 250000 & FINCBTXM_FST > 225000)
table(check$r)
#225,000
# For married couple, with kids
check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 150000 & FINCBTXM_FST > 125000)
table(check$r)
check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 175000 & FINCBTXM_FST > 150000)
table(check$r)
check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 200000 & FINCBTXM_FST > 175000)
table(check$r)
check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 225000 & FINCBTXM_FST > 200000)
table(check$r)
check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 250000 & FINCBTXM_FST > 225000)
table(check$r)
check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 275000 & FINCBTXM_FST > 250000)
table(check$r)
# 225,000
# For adults, no kids
check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 200000 & FINCBTXM_FST > 175000)
table(check$r)
check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 250000 & FINCBTXM_FST > 225000)
table(check$r)
check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 275000 & FINCBTXM_FST > 250000)
table(check$r)
check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 300000 & FINCBTXM_FST > 275000)
table(check$r)
check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 325000 & FINCBTXM_FST > 300000)
table(check$r)
check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 350000 & FINCBTXM_FST > 325000)
table(check$r)
check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 375000 & FINCBTXM_FST > 350000)
table(check$r)
check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 400000 & FINCBTXM_FST > 375000)
table(check$r)
check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 425000 & FINCBTXM_FST > 400000)
table(check$r)
check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 450000 & FINCBTXM_FST > 425000)
table(check$r)
#425,000
# For adults, with kids
check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t > 0 & FINCBTXM_FST < 200000 & FINCBTXM_FST > 175000)
table(check$r)
check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t > 0 & FINCBTXM_FST < 225000 & FINCBTXM_FST > 200000)
table(check$r)
check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t > 0 & FINCBTXM_FST < 250000 & FINCBTXM_FST > 225000)
table(check$r)
check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t > 0 & FINCBTXM_FST < 275000 & FINCBTXM_FST > 250000)
table(check$r)
check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t > 0 & FINCBTXM_FST < 300000 & FINCBTXM_FST > 275000)
table(check$r)
check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t > 0 & FINCBTXM_FST < 325000 & FINCBTXM_FST > 300000)
table(check$r)
check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t > 0 & FINCBTXM_FST < 350000 & FINCBTXM_FST > 325000)
table(check$r)
check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t > 0 & FINCBTXM_FST < 375000 & FINCBTXM_FST > 350000)
table(check$r)
check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t > 0 & FINCBTXM_FST < 400000 & FINCBTXM_FST > 375000)
table(check$r)
check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t > 0 & FINCBTXM_FST < 425000 & FINCBTXM_FST > 400000)
table(check$r)
# cutoff
df_f$drop <- ifelse((df_f$MARITAL_t==0 & df_f$NUM_ADTS_t==1 & df_f$NUM_KIDS_t ==0 & df_f$FINCBTXM_FST <= 125000), 0,
ifelse((df_f$MARITAL_t==0 & df_f$NUM_ADTS_t==1 & df_f$NUM_KIDS_t >0 & df_f$FINCBTXM_FST <= 275000), 0,
ifelse((df_f$MARITAL_t==1 & df_f$NUM_KIDS_t ==0 & df_f$FINCBTXM_FST <= 225000), 0,
ifelse((df_f$MARITAL_t==1 & df_f$NUM_KIDS_t >0 & df_f$FINCBTXM_FST <= 225000), 0,
ifelse((df_f$MARITAL_t==0 & df_f$NUM_ADTS_t > 1 & df_f$NUM_KIDS_t ==0 & df_f$FINCBTXM_FST <= 425000), 0,
ifelse((df_f$MARITAL_t==0 & df_f$NUM_ADTS_t > 1 & df_f$NUM_KIDS_t >0 & df_f$FINCBTXM_FST <= 425000), 0, 1))))))
df_f <- df_f %>% filter(df_f$drop==0) %>% select(-c(EX_N_PC,drop, MARITAL_t))
write.csv(df_f,"df_f.csv", row.names = FALSE)
### 8 Create imputed value of EIP3 ###
# Imprt imputed EIP data
imput_eip <- read_excel("Raw data/imputed_eip.xlsx")
#### 8.1 Restrict df_f to only observations on the 2105 or 2106 interview cycle
df_f_imp <- df_f %>% group_by(ID) %>% filter(any(YYMM == 2105 | YYMM == 2106 | YYMM == 2008 | YYMM == 2009 |
YYMM == 2011 | YYMM == 2012 | YYMM == 2102 | YYMM == 2103 |
YYMM == 2108 | YYMM == 2109 | YYMM == 2111 | YYMM == 2112))
#### 8.2 Mege df_f_imp with imputed values in IMPUT_EIP.xlsx
####    These imputations were created using tax unit level data on AGI and number of dependents internal to the BLS
df_f_imp <- merge(df_f_imp, imput_eip, by.x="NEWID", by.y = "newid")
#### 8.3 Create imputed value for EIPIII_t
df_f_imp <- df_f_imp %>% mutate(imp_eip3t = ifelse(EIPIII_t == imp_eip3_1, imp_eip3_1,
ifelse(EIPIII_t == imp_eip3_2, imp_eip3_2,
ifelse(EIPIII_t == imp_eip3_3, imp_eip3_3,
ifelse(EIPIII_t == imp_eip3_4, imp_eip3_4,0)))))
df_f_imp <- df_f_imp %>% mutate(imp_eip3t = ifelse((YYMM == 2105 | YYMM == 2106) & EIPIII_t == 0,
ifelse(INTERI == 1,imp_eip3_1,
ifelse(INTERI == 2,imp_eip3_2,
ifelse(INTERI == 3,imp_eip3_3,
ifelse(INTERI == 4,imp_eip3_4,-100)))),imp_eip3t))
df_f_imp <- df_f_imp %>% mutate(imp_eip3t = ifelse((YYMM == 2105 | YYMM == 2106) & EIPIII_t > 0 & imp_eip3t == 0,
ifelse(INTERI == 1,imp_eip3_1,
ifelse(INTERI == 2,imp_eip3_2,
ifelse(INTERI == 3,imp_eip3_3,
ifelse(INTERI == 4,imp_eip3_4,-200)))),imp_eip3t))
#### 8.4 Create imputed value for EIPIII_tm1
df_f_imp <- df_f_imp %>% group_by(ID) %>% arrange(YYMM, .by_group = TRUE) %>%
mutate(imp_eip3tm1 = lag(imp_eip3t, n=1, default = 0),
imp_eip3tm2 = lag(imp_eip3t, n=2, default = 0),
imp_eip3tm3 = lag(imp_eip3t, n=3, default = 0))
#### 8.5 Assign lagged imputed EIPIII value to CUs without observation in 2105 or 2006
df_f_imp <- df_f_imp %>% mutate(imp_eip3tm1 = ifelse((YYMM == 2108 | YYMM == 2109) & imp_eip3tm1 == 0,
ifelse(INTERI == 1, NA,
ifelse(INTERI == 2, imp_eip3_1,
ifelse(INTERI == 3, imp_eip3_2,
ifelse(INTERI == 4, imp_eip3_3,-300)))),imp_eip3tm1))
df_f_imp <- df_f_imp %>% mutate(imp_eip3tm2 = ifelse((YYMM == 2111 | YYMM == 2112) & imp_eip3tm2 == 0,
ifelse(INTERI == 1, NA,
ifelse(INTERI == 2, NA,
ifelse(INTERI == 3, imp_eip3_1,
ifelse(INTERI == 4, imp_eip3_2,-300)))),imp_eip3tm2))
#### 8.6 Create categorical representation of observed and imputed EIP value
df_f_imp <- df_f_imp %>% mutate(eip3t_cat = ifelse(EIPIII_t > 0 & EIPIII_t < 1400,1,
ifelse(EIPIII_t == 1400,2,
ifelse(EIPIII_t > 1400 & EIPIII_t < 2800,3,
ifelse(EIPIII_t == 2800,4,
ifelse(EIPIII_t > 2800 & EIPIII_t < 4200,5,
ifelse(EIPIII_t == 4200,6,
ifelse(EIPIII_t > 4200 & EIPIII_t < 5600,7,
ifelse(EIPIII_t == 5600,8,
ifelse(EIPIII_t > 5600,9,0))))))))))
df_f_imp <- df_f_imp %>% mutate(impeip3t_cat = ifelse(imp_eip3t > 0 & imp_eip3t < 1400,1,
ifelse(imp_eip3t == 1400,2,
ifelse(imp_eip3t > 1400 & imp_eip3t < 2800,3,
ifelse(imp_eip3t == 2800,4,
ifelse(imp_eip3t > 2800 & imp_eip3t < 4200,5,
ifelse(imp_eip3t == 4200,6,
ifelse(imp_eip3t > 4200 & imp_eip3t < 5600,7,
ifelse(imp_eip3t == 5600,8,
ifelse(imp_eip3t > 5600,9,0))))))))))
df_f_imp <- df_f_imp %>% select(-c(imp_eip2_1,imp_eip2_2,imp_eip2_3,imp_eip2_4,imp_eip3_1,imp_eip3_2,imp_eip3_3,imp_eip3_4))
write.csv(df_f_imp,"df_f_imp.csv", row.names = FALSE)
# Keep only df, all CU sample, the final sample, and the final sample with imputations
rm(list=setdiff(ls(), c("df","df_all_cu","df_f","df_f_imp")))
df_f_imp <- read_csv("df_f_imp.csv")
df_f_imp$ID <- as.double(df_f_imp$ID)
## Labeling observations as treated and untreated
df_f_imp <- df_f_imp %>% mutate(
tr_ob = ifelse((EIPIII_t>0 | EIPIII_tm1 >0 | EIPIII_tm2 > 0 | EIPIII_tm3 > 0), 1, 0),
tr_ob_imp = ifelse((imp_eip3t>0 | imp_eip3tm1 >0 | imp_eip3tm2 > 0 | imp_eip3tm3 > 0), 1, 0))
# dummies
df_f_imp <- df_f_imp %>% mutate(
iEIPI_t = ifelse(EIPI_t>0,1,0),
iEIPII_t = ifelse(EIPII_t>0,1,0),
iEIPIII_t = ifelse(EIPIII_t>0,1,0),
iEIPI_tm1 = ifelse(EIPI_tm1>0,1,0),
iEIPII_tm1 = ifelse(EIPII_tm1>0,1,0),
iEIPIII_tm1 = ifelse(EIPIII_tm1>0,1,0),
impiEIPIII_t = ifelse(imp_eip3t>0,1,0),
impiEIPIII_tm1 = ifelse(imp_eip3tm1>0,1,0))
# Scaling
## Food ##
df_f_sc_fd <- df_f_imp
df_f_sc_fd <- df_f_sc_fd %>% mutate(
s_d_EX_FD_t = d_EX_FD_t / EX_FD_AVG,
Y = s_d_EX_FD_t,
s_AGE_AVG_t = AGE_AVG_t / EX_FD_AVG,
s_d_NUM_ADTS_t = d_NUM_ADTS_t / EX_FD_AVG,
s_d_NUM_KIDS_t = d_NUM_KIDS_t / EX_FD_AVG,
s_EIPI_t = EIPI_t / EX_FD_AVG,
s_EIPI_tm1 = EIPI_tm1 / EX_FD_AVG,
s_iEIPI_t = iEIPI_t / EX_FD_AVG,
s_iEIPI_tm1 = iEIPI_tm1 / EX_FD_AVG,
s_EIPII_t = EIPII_t / EX_FD_AVG,
s_EIPII_tm1 = EIPII_tm1 / EX_FD_AVG,
s_iEIPII_t = iEIPII_t / EX_FD_AVG,
s_iEIPII_tm1 = iEIPII_tm1 / EX_FD_AVG,
s_EIPIII_t = EIPIII_t / EX_FD_AVG,
s_EIPIII_tm1 = EIPIII_tm1 / EX_FD_AVG,
s_iEIPIII_t = iEIPIII_t / EX_FD_AVG,
s_iEIPIII_tm1 = iEIPIII_tm1 / EX_FD_AVG,
s_impEIPIII_t = imp_eip3t / EX_FD_AVG,
s_impEIPIII_tm1 = imp_eip3tm1 / EX_FD_AVG,
s_impiEIPIII_t = impiEIPIII_t / EX_FD_AVG,
s_impiEIPIII_tm1 = impiEIPIII_tm1 / EX_FD_AVG)
# Drop outliers 446787, 465268 (about 1.5 as large, not drop here)
df_f_sc_fd <- df_f_sc_fd %>% filter(ID!=446787)
# Scaling
## Strict non-durabls ##
df_f_sc_sn <- df_f_imp
df_f_sc_sn <- df_f_sc_sn %>% mutate(
s_d_EX_SN_t = d_EX_SN_t / EX_SN_AVG ,
Y = s_d_EX_SN_t ,
s_AGE_AVG_t = AGE_AVG_t / EX_SN_AVG ,
s_d_NUM_ADTS_t = d_NUM_ADTS_t / EX_SN_AVG ,
s_d_NUM_KIDS_t = d_NUM_KIDS_t / EX_SN_AVG ,
s_EIPI_t = EIPI_t / EX_SN_AVG ,
s_EIPI_tm1 = EIPI_tm1 / EX_SN_AVG ,
s_iEIPI_t = iEIPI_t / EX_SN_AVG ,
s_iEIPI_tm1 = iEIPI_tm1 / EX_SN_AVG ,
s_EIPII_t = EIPII_t / EX_SN_AVG ,
s_EIPII_tm1 = EIPII_tm1 / EX_SN_AVG ,
s_iEIPII_t = iEIPII_t / EX_SN_AVG ,
s_iEIPII_tm1 = iEIPII_tm1 / EX_SN_AVG ,
s_EIPIII_t = EIPIII_t / EX_SN_AVG ,
s_EIPIII_tm1 = EIPIII_tm1 / EX_SN_AVG ,
s_iEIPIII_t = iEIPIII_t / EX_SN_AVG ,
s_iEIPIII_tm1 = iEIPIII_tm1 / EX_SN_AVG ,
s_impEIPIII_t = imp_eip3t / EX_SN_AVG,
s_impEIPIII_tm1 = imp_eip3tm1 / EX_SN_AVG,
s_impiEIPIII_t = impiEIPIII_t / EX_SN_AVG,
s_impiEIPIII_tm1 = impiEIPIII_tm1 / EX_SN_AVG)
# Drop outliers 465865
df_f_sc_sn <- df_f_sc_sn %>% filter(ID!=465865)
# Scaling
## Non-durabls ##
df_f_sc_n <- df_f_imp
df_f_sc_n <- df_f_sc_n %>% mutate(
s_d_EX_N_t = d_EX_N_t / EX_N_AVG,
Y = s_d_EX_N_t,
s_AGE_AVG_t = AGE_AVG_t / EX_N_AVG,
s_d_NUM_ADTS_t = d_NUM_ADTS_t / EX_N_AVG,
s_d_NUM_KIDS_t = d_NUM_KIDS_t / EX_N_AVG,
s_EIPI_t = EIPI_t / EX_N_AVG,
s_EIPI_tm1 = EIPI_tm1 / EX_N_AVG,
s_iEIPI_t = iEIPI_t / EX_N_AVG,
s_iEIPI_tm1 = iEIPI_tm1 / EX_N_AVG,
s_EIPII_t = EIPII_t / EX_N_AVG,
s_EIPII_tm1 = EIPII_tm1 / EX_N_AVG,
s_iEIPII_t = iEIPII_t / EX_N_AVG,
s_iEIPII_tm1 = iEIPII_tm1 / EX_N_AVG,
s_EIPIII_t = EIPIII_t / EX_N_AVG,
s_EIPIII_tm1 = EIPIII_tm1 / EX_N_AVG,
s_iEIPIII_t = iEIPIII_t / EX_N_AVG,
s_iEIPIII_tm1 = iEIPIII_tm1 / EX_N_AVG,
s_impEIPIII_t = imp_eip3t / EX_N_AVG,
s_impEIPIII_tm1 = imp_eip3tm1 / EX_N_AVG,
s_impiEIPIII_t = impiEIPIII_t / EX_N_AVG,
s_impiEIPIII_tm1 = impiEIPIII_tm1 / EX_N_AVG)
# Scaling
## Total expenditure ##
df_f_sc_t <- df_f_imp
df_f_sc_t <- df_f_sc_t %>% mutate(
s_d_EX_T_t = d_EX_T_t / EX_T_AVG,
Y = s_d_EX_T_t,
s_AGE_AVG_t = AGE_AVG_t / EX_T_AVG,
s_d_NUM_ADTS_t = d_NUM_ADTS_t / EX_T_AVG,
s_d_NUM_KIDS_t = d_NUM_KIDS_t / EX_T_AVG,
s_EIPI_t = EIPI_t / EX_T_AVG,
s_EIPI_tm1 = EIPI_tm1 / EX_T_AVG,
s_iEIPI_t = iEIPI_t / EX_T_AVG,
s_iEIPI_tm1 = iEIPI_tm1 / EX_T_AVG,
s_EIPII_t = EIPII_t / EX_T_AVG,
s_EIPII_tm1 = EIPII_tm1 / EX_T_AVG,
s_iEIPII_t = iEIPII_t / EX_T_AVG,
s_iEIPII_tm1 = iEIPII_tm1 / EX_T_AVG,
s_EIPIII_t = EIPIII_t / EX_T_AVG,
s_EIPIII_tm1 = EIPIII_tm1 / EX_T_AVG,
s_iEIPIII_t = iEIPIII_t / EX_T_AVG,
s_iEIPIII_tm1 = iEIPIII_tm1 / EX_T_AVG,
s_impEIPIII_t = imp_eip3t / EX_T_AVG,
s_impEIPIII_tm1 = imp_eip3tm1 / EX_T_AVG,
s_impiEIPIII_t = impiEIPIII_t / EX_T_AVG,
s_impiEIPIII_tm1 = impiEIPIII_tm1 / EX_T_AVG)
# Drop outliers 446783, 455209
df_f_sc_t <- df_f_sc_t %>% filter(ID!=446787 & ID!=455209)
# Create datasets without imputed EIP outliers
#   drop imp_eip > 10212 = (3531+3*2227)
df_f_sc_fd_v2 <- df_f_sc_fd %>% filter(imp_eip3t < 10212)
df_f_sc_sn_v2 <- df_f_sc_sn %>% filter(imp_eip3t < 10212)
df_f_sc_n_v2 <- df_f_sc_n %>% filter(imp_eip3t < 10212)
df_f_sc_t_v2 <- df_f_sc_t %>% filter(imp_eip3t < 10212)
# Create copys
df_f_sc_fd_copy <- df_f_sc_fd
df_f_sc_sn_copy <- df_f_sc_sn
df_f_sc_n_copy <- df_f_sc_n
df_f_sc_t_copy <- df_f_sc_t
df_f_sc_fd_v2_copy <- df_f_sc_fd_v2
df_f_sc_sn_v2_copy <- df_f_sc_sn_v2
df_f_sc_n_v2_copy <- df_f_sc_n_v2
df_f_sc_t_v2_copy <- df_f_sc_t_v2
imputation <- function(df){
# Imputation with observed EIP data
df_uno <- df %>% filter(tr_ob==0)
# Step 1: Computing coefficients using only never-treated and not-yet treated units
base <- feols(Y ~ 0 + r + s_EIPI_t + s_EIPII_t + s_EIPI_tm1 + s_EIPII_tm1 +
s_AGE_AVG_t + s_d_NUM_ADTS_t + s_d_NUM_KIDS_t + factor(YYMM),
weights=~FINLWT21_AVG,cluster=~ID, data=df_uno)
# Step 2: Imputing the counter-factual outcomes for all units
df = df %>% mutate(
coeff_r = base$coefficients[1],
coeff_EIPI_t = base$coefficients[2],
coeff_EIPII_t = base$coefficients[3],
coeff_EIPI_tm1 = base$coefficients[4],
coeff_EIPII_tm1 = base$coefficients[5],
coeff_AGE = base$coefficients[6],
coeff_ADTS = base$coefficients[7],
coeff_KIDS = base$coefficients[8],
YYMM_fe = ifelse(YYMM==2011,base$coefficients[9],
ifelse(YYMM==2012,base$coefficients[10],
ifelse(YYMM==2102,base$coefficients[11],
ifelse(YYMM==2103,base$coefficients[12],
ifelse(YYMM==2105,base$coefficients[13],
ifelse(YYMM==2106,base$coefficients[14],
ifelse(YYMM==2108,base$coefficients[15],
ifelse(YYMM==2109,base$coefficients[16],base$coefficients[17])))))))),
Y_0 = coeff_r * r + coeff_EIPI_t * s_EIPI_t + coeff_EIPII_t * s_EIPII_t +
coeff_EIPI_tm1 * s_EIPI_tm1 + coeff_EIPII_tm1 * s_EIPII_tm1 +
coeff_AGE * s_AGE_AVG_t + coeff_ADTS * s_d_NUM_ADTS_t + coeff_KIDS * s_d_NUM_KIDS_t + YYMM_fe,
# Step 3: Differencing the counter-factual from the observed outcome
Y_new = Y - Y_0)
# Imputation with imputed EIP data
df_uno_imp <- df %>% filter(tr_ob_imp==0)
# Step 1: Computing coefficients using only never-treated and not-yet treated units
base_imp <- feols(Y ~ 0 + r + s_EIPI_t + s_EIPII_t + s_EIPI_tm1 + s_EIPII_tm1 +
s_AGE_AVG_t + s_d_NUM_ADTS_t + s_d_NUM_KIDS_t + factor(YYMM),
weights=~FINLWT21_AVG,cluster=~ID, data=df_uno_imp)
# Step 2: Imputing the counter-factual outcomes for all units
df = df %>% mutate(
coeff_r_imp = base_imp$coefficients[1],
coeff_EIPI_t_imp = base_imp$coefficients[2],
coeff_EIPII_t_imp = base_imp$coefficients[3],
coeff_EIPI_tm1_imp = base_imp$coefficients[4],
coeff_EIPII_tm1_imp = base_imp$coefficients[5],
coeff_AGE_imp = base_imp$coefficients[6],
coeff_ADTS_imp = base_imp$coefficients[7],
coeff_KIDS_imp = base_imp$coefficients[8],
YYMM_fe_imp = ifelse(YYMM==2011,base_imp$coefficients[9],
ifelse(YYMM==2012,base_imp$coefficients[10],
ifelse(YYMM==2102,base_imp$coefficients[11],
ifelse(YYMM==2103,base_imp$coefficients[12],
ifelse(YYMM==2105,base_imp$coefficients[13],
ifelse(YYMM==2106,base_imp$coefficients[14],
ifelse(YYMM==2108,base_imp$coefficients[15],
ifelse(YYMM==2109,base_imp$coefficients[16],base_imp$coefficients[17])))))))),
Y_0_imp = coeff_r_imp * r + coeff_EIPI_t_imp * s_EIPI_t + coeff_EIPII_t_imp * s_EIPII_t +
coeff_EIPI_tm1_imp * s_EIPI_tm1 + coeff_EIPII_tm1_imp * s_EIPII_tm1 +
coeff_AGE_imp * s_AGE_AVG_t + coeff_ADTS_imp * s_d_NUM_ADTS_t + coeff_KIDS_imp * s_d_NUM_KIDS_t + YYMM_fe_imp,
# Step 3: Differencing the counter-factual from the observed outcome
Y_new_imp = Y - Y_0_imp)
return(df)
}
df_f_sc_fd <- imputation(df_f_sc_fd_copy) # Y is s_d_EX_FD
df_f_sc_fd_obs <- df_f_sc_fd %>% filter(tr_ob==1)
df_f_sc_fd_imp <- df_f_sc_fd %>% filter(tr_ob_imp==1)
df_f_sc_sn <- imputation(df_f_sc_sn_copy) # Y is s_d_EX_SN
df_f_sc_sn_obs <- df_f_sc_sn %>% filter(tr_ob==1)
df_f_sc_sn_imp <- df_f_sc_sn %>% filter(tr_ob_imp==1)
df_f_sc_n <- imputation(df_f_sc_n_copy) # Y is s_d_EX_N
df_f_sc_n_obs <- df_f_sc_n %>% filter(tr_ob==1)
df_f_sc_n_imp <- df_f_sc_n %>% filter(tr_ob_imp==1)
df_f_sc_t <- imputation(df_f_sc_t_copy) # Y is s_d_EX_T
df_f_sc_t_obs <- df_f_sc_t %>% filter(tr_ob==1)
df_f_sc_t_imp <- df_f_sc_t %>% filter(tr_ob_imp==1)
df_f_sc_fd_v2 <- imputation(df_f_sc_fd_v2_copy) # Y is s_d_EX_FD
df_f_sc_fd_v2_obs <- df_f_sc_fd_v2 %>% filter(tr_ob==1)
df_f_sc_fd_v2_imp <- df_f_sc_fd_v2 %>% filter(tr_ob_imp==1)
df_f_sc_sn_v2 <- imputation(df_f_sc_sn_v2_copy) # Y is s_d_EX_SN
df_f_sc_sn_v2_obs <- df_f_sc_sn_v2 %>% filter(tr_ob==1)
df_f_sc_sn_v2_imp <- df_f_sc_sn_v2 %>% filter(tr_ob_imp==1)
df_f_sc_n_v2 <- imputation(df_f_sc_n_v2_copy) # Y is s_d_EX_N
df_f_sc_n_v2_obs <- df_f_sc_n_v2 %>% filter(tr_ob==1)
df_f_sc_n_v2_imp <- df_f_sc_n_v2 %>% filter(tr_ob_imp==1)
df_f_sc_t_v2 <- imputation(df_f_sc_t_v2_copy) # Y is s_d_EX_T
df_f_sc_t_v2_obs <- df_f_sc_t_v2 %>% filter(tr_ob==1)
df_f_sc_t_v2_imp <- df_f_sc_t_v2 %>% filter(tr_ob_imp==1)
### Table III ####
##### Panel A: Normalized level change - Normalized EIPI amount ####
# Food
n_w_fd_cle_le_to <- feols(Y_new ~ s_EIPIII_t + s_EIPIII_tm1,
weights=~FINLWT21_AVG,cluster=~ID, data=df_f_sc_fd_obs)
imp_n_w_fd_cle_le_to <- feols(Y_new ~ s_impEIPIII_t + s_impEIPIII_tm1,
weights=~FINLWT21_AVG,cluster=~ID, data=df_f_sc_fd_imp)
n_w_fd_cle_le_to_v2 <- feols(Y_new ~ s_EIPIII_t + s_EIPIII_tm1,
weights=~FINLWT21_AVG,cluster=~ID, data=df_f_sc_fd_v2_obs)
imp_n_w_fd_cle_le_to_v2 <- feols(Y_new_imp ~ s_impEIPIII_t + s_impEIPIII_tm1,
weights=~FINLWT21_AVG,cluster=~ID, data=df_f_sc_fd_v2_imp)
# summary(n_w_fd_cle_le_to)
# Strictly non-durables
n_w_sn_cle_le_to <- feols(Y_new ~ s_EIPIII_t + s_EIPIII_tm1,
weights=~FINLWT21_AVG,cluster=~ID, data=df_f_sc_sn_obs)
imp_n_w_sn_cle_le_to <- feols(Y_new_imp ~ s_impEIPIII_t + s_impEIPIII_tm1,
weights=~FINLWT21_AVG,cluster=~ID, data=df_f_sc_sn_imp)
n_w_sn_cle_le_to_v2 <- feols(Y_new ~ s_EIPIII_t + s_EIPIII_tm1,
weights=~FINLWT21_AVG,cluster=~ID, data=df_f_sc_sn_v2_obs)
imp_n_w_sn_cle_le_to_v2 <- feols(Y_new_imp ~ s_impEIPIII_t + s_impEIPIII_tm1,
weights=~FINLWT21_AVG,cluster=~ID, data=df_f_sc_sn_v2_imp)
# summary(n_w_sn_cle_le_to)
# Non-durables
n_w_n_cle_le_to <- feols(Y_new ~ s_EIPIII_t + s_EIPIII_tm1,
weights=~FINLWT21_AVG,cluster=~ID, data=df_f_sc_n_obs)
imp_n_w_n_cle_le_to <- feols(Y_new_imp ~ s_impEIPIII_t + s_impEIPIII_tm1,
weights=~FINLWT21_AVG,cluster=~ID, data=df_f_sc_n_imp)
n_w_n_cle_le_to_v2 <- feols(Y_new ~ s_EIPIII_t + s_EIPIII_tm1,
weights=~FINLWT21_AVG,cluster=~ID, data=df_f_sc_n_v2_obs)
imp_n_w_n_cle_le_to_v2 <- feols(Y_new_imp ~ s_impEIPIII_t + s_impEIPIII_tm1,
weights=~FINLWT21_AVG,cluster=~ID, data=df_f_sc_n_v2_imp)
# summary(n_w_n_cle_le_to)
# Total
n_w_t_cle_le_to <- feols(Y_new ~ s_EIPIII_t + s_EIPIII_tm1,
weights=~FINLWT21_AVG,cluster=~ID, data=df_f_sc_t_obs)
imp_n_w_t_cle_le_to <- feols(Y_new_imp ~ s_impEIPIII_t + s_impEIPIII_tm1,
weights=~FINLWT21_AVG,cluster=~ID, data=df_f_sc_t_imp)
n_w_t_cle_le_to_v2 <- feols(Y_new ~ s_EIPIII_t + s_EIPIII_tm1,
weights=~FINLWT21_AVG,cluster=~ID, data=df_f_sc_t_v2_obs)
imp_n_w_t_cle_le_to_v2 <- feols(Y_new_imp ~ s_impEIPIII_t + s_impEIPIII_tm1,
weights=~FINLWT21_AVG,cluster=~ID, data=df_f_sc_t_v2_imp)
# summary(n_w_t_cle_le_to)
etable(n_w_fd_cle_le_to,n_w_sn_cle_le_to,n_w_n_cle_le_to,n_w_t_cle_le_to, style.tex = style.tex("aer"))
etable(imp_n_w_fd_cle_le_to,imp_n_w_sn_cle_le_to,imp_n_w_n_cle_le_to,imp_n_w_t_cle_le_to, style.tex = style.tex("aer"))
etable(imp_n_w_fd_cle_le_to_v2,imp_n_w_sn_cle_le_to_v2,imp_n_w_n_cle_le_to_v2,imp_n_w_t_cle_le_to_v2, style.tex = style.tex("aer"))
